# 导入类型检查库
from typing import List, Union
from pathlib import Path
# 导入继承库
from langchain_community.document_loaders.base import BaseLoader
from abc import ABC
# 导入处理docx文件的库
import subprocess
import docx2txt


class DocReader(BaseLoader, ABC):
    """
    Load "Doc" file
    use antiword 
    """
    def __init__(self, file_path:str):
        """Initialize with file path"""
        self.file_path = file_path

    def __del__(self):
        pass

    def load(self):
        """Get file data"""
        output = subprocess.check_output(['antiword', self.file_path])
        return output.decode('utf-8')